#!/usr/bin/python
#coding: utf-8
###############################################################

import re,os
import urllib2
import Page

class Crawler:

	def __init__(self):
		self.user_agent = "User-Agent:Mozilla/5.0 "\
			+ "(Windows; U; Windows NT 5.1: en-GB;rv:1.8.1.4)"\
			+ "Gecko/20070515 Firefox/2.0.0.4"
	def crawlPage(self,page):
		try:
			request = urllib2.Request(page.url)
			request.add_header("User-Agent",self.user_agent)
			socket = urllib2.urlopen(request)
			page.html = socket.read()			       
			page.info = socket.info()
			socket.close()
			meta = page.info.items()
			match = None
			for i in meta:
				#print str(i)
				match = re.search(r'\.(\w+\.com)',str(i))
				if match :
					page.domain = match.group(1)
					break
			#print page.domain	
			
		except str:
				raise RuntimeError("Unable to open %s" %url)
		return page 
	
if __name__ =="__main__":
	cc = Crawler()
	page = Page.Page()
	url="http://china.hx2car.com/search.htm?query=%B1%A6%C2%ED"
	page.url = url
	p=cc.crawlPage(page)
	#print p.html




